Set-up

Load required packages or install as necessary.

library(readxl)
library(stringr)
library(ggplot2)
library(mgcv)
library(purrr)
library(tidyr)
library(forcats)
library(ggpmisc)
library(devtools)
library(readr)
library(ggpubr)
library(pracma)
library(dplyr)
library(lme4)
library(drc)
library(emmeans)
library(multcomp)
library(multcompView)
library(MuMIn)
library(effectsize)
library(ggrepel)
library(lessR)
library(broom)

# print R version
print(paste("R version:", R.version$version.string))
## [1] "R version: R version 4.3.0 (2023-04-21)"
# uncomment line below and run if you need to install any of these
# install.packages(c("readxl", "stringr", "ggplot2", "mgcv", "purrr", "tidyr", "forcats", "ggpmisc", "devtools", "readr", "ggpubr", "pracma", "dplyr", "lme4", "drc", "emmeans", "multcomp", "multcompView", "MuMIn", "effectsize", "ggrepel", "lessR", "broom"))

Load spreadsheets

We’re testing whether there are differences in fecunidty of gravid females on three different hop cultivars: Cascade, Pacific Gem, and Nugget.

# Load cultivar screening data
df <- read_csv("./datasheets/mite_cultivar_screening_subset.csv")
## Rows: 250 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Image_name, Cultivar, Date_subdirectory, Class
## dbl (4): Run, Replicate, DPI, Ground_truth
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load other host and occlusion leaf testing data
df2 <- read_xlsx("./datasheets/concat_test_results_mite_models.xlsx")

# Exclude padded data (suboptimal performance compared to unpadded)
df2 <- df2 %>% 
  filter(Padded != "TRUE")

# Load miticidal data
df3<-read.csv("./datasheets/floramite_day_5.csv")

Cultivar fecundity testing

Calculate area under curve and model fecundity data for the cultivar screening

# Calculate individual-level AUC (each Image_name is a replicate)
individual_auc <- df %>%
  group_by(Cultivar, Image_name, Run) %>%
  arrange(DPI) %>%
  mutate(Run = factor(Run)) %>%
  summarize(AUC = trapz(DPI, Ground_truth)) %>%
  ungroup()
## `summarise()` has grouped output by 'Cultivar', 'Image_name'. You can override
## using the `.groups` argument.
# pull out cultivars with three or more runs
individual_auc <- individual_auc %>%
  filter(Cultivar %in% c("Nugget", "Pacific_Gem", "Cascade"))

# run is fixed
m1<-lm(AUC ~ Cultivar + Run, data = individual_auc)

# run is random (more typical)
m2 <- lmer(AUC ~ Cultivar + (1 | Run), data = individual_auc)
## boundary (singular) fit: see help('isSingular')
# look at residuals
resid_vals <- resid(m1)
resid_vals_m2 <- resid(m2)


m1
## 
## Call:
## lm(formula = AUC ~ Cultivar + Run, data = individual_auc)
## 
## Coefficients:
##         (Intercept)       CultivarNugget  CultivarPacific_Gem  
##             124.399              -30.760              -17.023  
##                Run2                 Run3  
##               5.361                5.076
m2
## Linear mixed model fit by REML ['lmerMod']
## Formula: AUC ~ Cultivar + (1 | Run)
##    Data: individual_auc
## REML criterion at convergence: 410.7902
## Random effects:
##  Groups   Name        Std.Dev.
##  Run      (Intercept)  0.00   
##  Residual             26.12   
## Number of obs: 46, groups:  Run, 3
## Fixed Effects:
##         (Intercept)       CultivarNugget  CultivarPacific_Gem  
##              127.34               -30.02               -16.34  
## optimizer (nloptwrap) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings

Look at effect size

# marginal R² (fixed effects only) and conditional R² (fixed + random effects)
r2 <- r.squaredGLMM(m1)

# marginal R² (fixed effects only) and conditional R² (fixed + random effects)
r2_2 <- r.squaredGLMM(m2)

# standardized coefficients (like beta weights or Cohen's d)
std <- standardize_parameters(m1)
std_2 <- standardize_parameters(m2)
## boundary (singular) fit: see help('isSingular')
print("run as a fixed effect")
## [1] "run as a fixed effect"
std
## # Standardization method: refit
## 
## Parameter              | Std. Coef. |         95% CI
## ----------------------------------------------------
## (Intercept)            |       0.45 | [-0.13,  1.02]
## Cultivar [Nugget]      |      -1.08 | [-1.74, -0.42]
## Cultivar [Pacific_Gem] |      -0.60 | [-1.30,  0.11]
## Run [2]                |       0.19 | [-0.48,  0.86]
## Run [3]                |       0.18 | [-0.52,  0.87]
print("run as a random effect")
## [1] "run as a random effect"
std_2
## # Standardization method: refit
## 
## Parameter              | Std. Coef. |         95% CI
## ----------------------------------------------------
## (Intercept)            |       0.55 | [ 0.09,  1.01]
## Cultivar [Nugget]      |      -1.05 | [-1.69, -0.41]
## Cultivar [Pacific_Gem] |      -0.57 | [-1.26,  0.12]
print("run as a fixed effect")
## [1] "run as a fixed effect"
r2
##            R2m       R2c
## [1,] 0.1948162 0.1948162
print("run as a random effect")
## [1] "run as a random effect"
r2_2
##           R2m      R2c
## [1,] 0.194972 0.194972

Normality checks

# plot indivual_auc
hist(individual_auc$AUC,
     breaks = 15,
     probability = TRUE,
     main = "Histogram of individual AUC",
     xlab = "AUC",
     col = "lightgray")
lines(density(individual_auc$AUC), lwd = 2, col = "blue")

qqnorm(individual_auc$AUC,
       main = "QQ-Plot of individual AUC")
qqline(individual_auc$AUC, col = "red", lwd = 2)

by_cultivar_sw <- individual_auc %>%
  group_by(Cultivar) %>%
  summarize(
    N       = n(),
    mean    = mean(AUC),
    median  = median(AUC),
    sd      = sd(AUC),
    shapiro_p = if (n() >= 3 && n() <= 5000) {
      shapiro.test(AUC)$p.value
    } else {
      NA_real_
    }
  )

print(by_cultivar_sw)
## # A tibble: 3 × 6
##   Cultivar        N  mean median    sd shapiro_p
##   <chr>       <int> <dbl>  <dbl> <dbl>     <dbl>
## 1 Cascade        16 127.    121   26.9    0.257 
## 2 Nugget         17  97.3    91   17.1    0.0883
## 3 Pacific_Gem    13 111     112.  33.9    0.424
# Residuals QQ‐plot
qqnorm(resid_vals); qqline(resid_vals)

# Shapiro on residuals
shapiro.test(resid_vals)
## 
##  Shapiro-Wilk normality test
## 
## data:  resid_vals
## W = 0.97268, p-value = 0.3471

LMM modeling with Nugget, Pgem, and Cascade

# This “Tukey” adjustment is the classic choice when you have more than two treatments and you want all pairwise contrasts at a family‐wise error rate of 0.05. Because your three runs are accounted for as a random effect, the standard errors and denominator degrees of freedom come out correctly (this is sometimes referred to as the “Tukey–Kramer” adjustment if sample sizes differ
emm <- emmeans(m1, ~ Cultivar) 
emm2 <- emmeans(m2, ~ Cultivar) 

tuk_pairs <- pairs(emm, adjust = "tukey")
tuk_pairs2 <- pairs(emm2, adjust = "tukey")

tuk_df <- summary(tuk_pairs)
tuk_df2 <- summary(tuk_pairs2)

cld_df <- cld(tuk_pairs,
              Letters = letters,
              sort    = TRUE)
cld_df2 <- cld(tuk_pairs2,
              Letters = letters,
              sort    = TRUE)

#tuk_glht <- glht(m1, linfct = mcp(Cultivar = "Tukey"))
#cld_df <- cld(tuk_glht, Letters = letters)

# run as a fixed effect
print("This model treats run as a fixed effect:")
## [1] "This model treats run as a fixed effect:"
cld_df
##  contrast              estimate    SE df t.ratio p.value .group
##  Nugget - Pacific_Gem     -13.7  9.82 41  -1.399  0.3506  a    
##  Cascade - Pacific_Gem     17.0 10.00 41   1.703  0.2164   b   
##  Cascade - Nugget          30.8  9.36 41   3.288  0.0058   b   
## 
## Results are averaged over the levels of: Run 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.
# run as a random effect
print("This model treats run as a random effect:")
## [1] "This model treats run as a random effect:"
cld_df2
##  contrast              estimate   SE   df t.ratio p.value .group
##  Nugget - Pacific_Gem     -13.7 9.64 41.1  -1.419  0.3407  a    
##  Cascade - Pacific_Gem     16.3 9.88 41.9   1.655  0.2346   b   
##  Cascade - Nugget          30.0 9.26 42.2   3.240  0.0065   b   
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.

Plot cultivar differences

max_auc <- individual_auc %>%
  group_by(Cultivar) %>%
  summarize(maxAUC = max(AUC), .groups = "drop")


individual_auc$Cultivar <- factor(individual_auc$Cultivar)
cultivars <- levels(individual_auc$Cultivar)
n <- length(cultivars)

n
## [1] 3
p_mat <- matrix(NA, nrow = n, ncol = n,
                dimnames = list(cultivars, cultivars))

for (i in seq_len(nrow(tuk_df))) {
  pair_names <- strsplit(tuk_df$contrast[i], " - ")[[1]]
  C1 <- pair_names[1]
  C2 <- pair_names[2]
  pval <- tuk_df$p.value[i]
  
  # Place pval into symmetric positions
  p_mat[C1, C2] <- pval
  p_mat[C2, C1] <- pval
}

diag(p_mat) <- NA
print(p_mat)
##                 Cascade      Nugget Pacific_Gem
## Cascade              NA 0.005767551   0.2164492
## Nugget      0.005767551          NA   0.3506273
## Pacific_Gem 0.216449222 0.350627328          NA
letters_named <- multcompLetters(p_mat)$Letters
print(letters_named)
##     Cascade      Nugget Pacific_Gem 
##         "a"         "b"        "ab"
cl_per_cult <- data.frame(
  Cultivar = names(letters_named),
  .group   = unname(letters_named),
  stringsAsFactors = FALSE
)

max_auc <- individual_auc %>%
  group_by(Cultivar) %>%
  summarize(maxAUC = max(AUC), .groups = "drop")

letter_positions <- cl_per_cult %>%
  left_join(max_auc, by = "Cultivar") %>%
  mutate(y_pos = maxAUC + 10)

desired_order <- cl_per_cult$Cultivar

individual_auc <- individual_auc %>%
  mutate(Cultivar = factor(Cultivar, levels = desired_order))

letter_positions <- letter_positions %>%
  mutate(Cultivar = factor(Cultivar, levels = desired_order))

# set order of cultivar to Cascade, Pacific_Gem, and then Nugget
individual_auc$Cultivar <- factor(individual_auc$Cultivar, levels = c("Cascade", "Pacific_Gem", "Nugget"))

ggplot(individual_auc, aes(x = Cultivar, y = AUC, fill = Cultivar)) +
  geom_boxplot(width = 0.6, outlier.shape = NA, alpha = 0.6) +
  geom_jitter(width = 0.1, size = 2, alpha = 0.8, shape = 21, color = "black") +
  scale_fill_viridis_d(
    name = "Cultivar",
  ) +
  # Make y_pos = maxAUC + 5% of the data range
  geom_text(
    data = letter_positions %>%
             mutate(y_pos = maxAUC + 0.05 * (max(individual_auc$AUC) - min(individual_auc$AUC))),
    aes(x = Cultivar, y = y_pos, label = .group),
    size = 5,
    fontface = "bold",
    color = "black"
  ) +
  labs(
    title    = "Area Under Curve (AUC) of Egg Laying \n Rate by Cultivar (5 days)",
    x        = "Hop Cultivar",
    y        = "AUC (egg laying over 5 days)",
#    caption  = "Letters: Tukey HSD groupings (α = 0.05), n = 18 per cultivar"
  ) +
  theme_classic(base_size = 14) +
  theme(
    axis.title           = element_text(face = "bold"),
    axis.text.x          = element_text(angle = 20, hjust = 1),
    axis.text.y          = element_text(size = 12),
    legend.position      = "none",
    plot.title           = element_text(size = 14, face = "bold", hjust = 0.5),
    plot.caption         = element_text(size = 9, hjust = 0)
  ) 

# save plot
ggsave("AUC_by_cultivar.png", width = 5, height = 6, dpi = 300)

Pgem Nug Cas test set

# subset just the images acquired when testing the fecundity system
df_leaf_assay_test_set <- df2 %>%
  dplyr::filter(!grepl("cascade_mites_rnd2", Image_name)) %>%  # these were from GH samples, not the fecundity testing hence excluding them
  dplyr::filter(!grepl("cascade_mites", Image_name)) %>% # these were from GH samples, not the fecundity testing hence excluding them
  dplyr::filter(!grepl("nugget_mites", Image_name)) %>% # these were from GH samples, not the fecundity testing hence excluding them
  dplyr::filter(Cultivar %in% c("Cascade", "Pacific_Gem", "Nugget"))

# run pearsons correlation on cultivar/class/detections vs cultivar/class/groundtruth
pearson_results <- df_leaf_assay_test_set %>%
   dplyr::filter(!Class == "Immature")  %>% # not enough observerations for this class, will return errors
  group_by(Model_version, Cultivar, Class) %>%
  summarize(
    r = cor(Total_GT, Total_detections, method = "pearson"),
    p_val = cor.test(Total_GT, Total_detections, method = "pearson")$p.value,
    total_gt = sum(Total_GT),
    n_images = n()
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'Model_version', 'Cultivar'. You can
## override using the `.groups` argument.
# note NA typically indicates perfect agreement
pearson_results
## # A tibble: 18 × 7
##    Model_version Cultivar    Class             r     p_val total_gt n_images
##            <dbl> <chr>       <chr>         <dbl>     <dbl>    <dbl>    <int>
##  1           209 Cascade     Adult_female NA     NA              22       22
##  2           209 Cascade     Viable_egg    0.999  1.73e-26      604       21
##  3           209 Nugget      Adult_female NA     NA              13       13
##  4           209 Nugget      Viable_egg    0.999  1.39e-16      328       13
##  5           209 Pacific_Gem Adult_female NA     NA              14       14
##  6           209 Pacific_Gem Viable_egg    0.982  3.29e-12      357       17
##  7           210 Cascade     Adult_female NA     NA              22       22
##  8           210 Cascade     Viable_egg    0.999  8.26e-27      604       21
##  9           210 Nugget      Adult_female NA     NA              13       13
## 10           210 Nugget      Viable_egg    0.999  1.70e-16      328       13
## 11           210 Pacific_Gem Adult_female NA     NA              14       14
## 12           210 Pacific_Gem Viable_egg    0.969  1.79e-10      357       17
## 13           211 Cascade     Adult_female NA     NA              22       22
## 14           211 Cascade     Viable_egg    0.999  2.07e-26      604       21
## 15           211 Nugget      Adult_female NA     NA              13       13
## 16           211 Nugget      Viable_egg    0.999  1.33e-15      328       13
## 17           211 Pacific_Gem Adult_female NA     NA              14       14
## 18           211 Pacific_Gem Viable_egg    0.975  2.99e-11      357       17
# you can check why it's NA by filtering for the combo that gave it to see why...
df2 %>% 
  filter(Cultivar == "Pacific_Gem", Class == "Adult_female", Model_version == "209") %>%
  dplyr::select(Total_GT, Total_detections)
## # A tibble: 14 × 2
##    Total_GT Total_detections
##       <dbl>            <dbl>
##  1        1                1
##  2        1                1
##  3        1                1
##  4        1                1
##  5        1                1
##  6        1                1
##  7        1                1
##  8        1                1
##  9        1                1
## 10        1                1
## 11        1                1
## 12        1                1
## 13        1                1
## 14        1                1
# plot ground truth vs predicted
palette <- c("#F3B341","#6F5EE7","#CB377D","#EC6A2C","#668DF5")

# fit linear model
df_leaf_assay_test_set %>%
  ggplot(aes(x = Total_GT, y = Total_detections, color = Class)) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0, color = "black", linetype = "dashed") +
  geom_smooth(method = "lm", se = FALSE) +
  stat_poly_eq(
    aes(
      label = paste(..eq.label.., ..rr.label.., sep = "~~~")
    ),
    formula = y ~ x,
    parse = TRUE,
    color= "black",
  ) +
  facet_wrap(Model_version~Cultivar) +
  labs(
    title = "Ground Truth vs. Predicted Detections (Subset of Leaf Fecundity Data)",
    x = "Ground Truth",
    y = "Computer Vision Detections Split by Model Version"
  ) +
  theme_classic() +
  scale_color_manual(values = palette)
## `geom_smooth()` using formula = 'y ~ x'

# save image
ggsave("ground_truth_vs_predicted_split_by_cv_pgem_nug_cas_lm.pdf", width = 10, height = 10, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
# Pearson's
df_leaf_assay_test_set %>%
  ggplot(aes(x = Total_GT, y = Total_detections, color = Class)) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0, color = "black", linetype = "dashed") +
  stat_cor(
    method = "pearson",
    aes(group = interaction(Model_version, Cultivar)),  # ensure facet-wise stats
    label.x.npc = "left",
    label.y.npc = "top",
    size = 3.5,
    color = "black",
    parse = FALSE  # set only here, outside aes()
  ) +
  facet_wrap(Model_version ~ Cultivar) +
  labs(
    title = "Ground Truth vs. Predicted Detections (Pearson's R)",
    x = "Ground Truth",
    y = "Computer Vision Detections"
  ) +
  theme_classic() +
  scale_color_manual(values = palette)

# save image
ggsave("ground_truth_vs_predicted_split_by_cv_pgem_nug_cas.svg", width = 10, height = 10, dpi = 300)

Compare ground truth vs predicted

# plot ground truth vs predicted
df_leaf_assay_test_set %>%
  dplyr::filter(Cultivar %in% c("Pacific_Gem","Cascade","Nugget")) %>%
  ggplot(aes(x = Total_GT, y = Total_detections, color = Class)) +
  geom_point(aes(shape = Cultivar),alpha=0.8) +
  geom_abline(slope = 1, intercept = 0, color = "black", linetype = "dashed") +
  geom_smooth(method = "lm", se = FALSE, alpha=0.8) +
  stat_poly_eq(
    aes(
      label = paste(..eq.label.., ..rr.label.., sep = "~~~")
    ),
    formula = y ~ x,
    parse = TRUE,
    color= "black",
  ) +
  facet_wrap(~Model_version) +
  labs(
    title = "Ground Truth vs. Detections (Pacific Gem, Nugget, Cascade Fecundity Test Set)",
    x = "Ground Truth",
    y = "Computer Vision Detections Split by Model Version"
  ) +
  theme_classic() +
  scale_color_manual(values = palette)
## `geom_smooth()` using formula = 'y ~ x'

# save image
ggsave("ground_truth_vs_predicted_cas_nug_pgem_test_set_grouped.png", width = 12, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
# calculate mean precision and recall for each cultivar
df_leaf_assay_test_set %>% 
  group_by(Model_version, Cultivar, Class) %>%
  summarize(
    mean_precision = mean(Precision, na.rm = TRUE),
    mean_recall = mean(Recall, na.rm = TRUE),
    n = n()
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'Model_version', 'Cultivar'. You can
## override using the `.groups` argument.
## # A tibble: 27 × 6
##    Model_version Cultivar    Class        mean_precision mean_recall     n
##            <dbl> <chr>       <chr>                 <dbl>       <dbl> <int>
##  1           209 Cascade     Adult_female          0.955       0.955    22
##  2           209 Cascade     Immature              1           0.893     1
##  3           209 Cascade     Viable_egg            0.997       0.983    21
##  4           209 Nugget      Adult_female          1           1        13
##  5           209 Nugget      Immature              0           0         1
##  6           209 Nugget      Viable_egg            0.983       0.983    13
##  7           209 Pacific_Gem Adult_female          1           1        14
##  8           209 Pacific_Gem Immature              0.667       0.417     3
##  9           209 Pacific_Gem Viable_egg            0.912       0.823    17
## 10           210 Cascade     Adult_female          0.955       0.955    22
## # ℹ 17 more rows
df_leaf_assay_test_set %>% 
  group_by(Model_version, Cultivar) %>%
  summarize(
    mean_precision = mean(Precision, na.rm = TRUE),
    mean_recall = mean(Recall, na.rm = TRUE),
    sum_objects = sum(Number_objects_in_image),
    n = n()
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'Model_version'. You can override using the
## `.groups` argument.
## # A tibble: 9 × 6
##   Model_version Cultivar    mean_precision mean_recall sum_objects     n
##           <dbl> <chr>                <dbl>       <dbl>       <dbl> <int>
## 1           209 Cascade              0.976       0.967        1318    44
## 2           209 Nugget               0.955       0.955         739    27
## 3           209 Pacific_Gem          0.926       0.860         768    34
## 4           210 Cascade              0.977       0.973        1318    44
## 5           210 Nugget               0.988       0.965         739    27
## 6           210 Pacific_Gem          0.952       0.915         768    34
## 7           211 Cascade              0.973       0.962        1318    44
## 8           211 Nugget               0.992       0.966         739    27
## 9           211 Pacific_Gem          0.954       0.863         768    34

Object occlusion and other host leaf test set

Look at how overall precision changes with more objects.

# keep only Image_Names found in all three models
df_sub <- df2 %>%
  group_by(Image_name) %>%
  filter(n_distinct(Model_version) >= 3) %>%
  ungroup()

# dont want to overplot, so dropping columns for now
# if I didn't drop the extra columns I'd have duplicate data points since its in long format (by class)
df_sub_pre_recall_unique <- df_sub %>%
  dplyr::select(c(Model_version,
                  Image_name,
                  Number_objects_in_image,
                  Host,
                  Cultivar,
                  Mite_Brush_or_Leaf,
                  Padded,
                  Overall_Recall,
                  Overall_Precision)) %>%
  unique() %>%
  filter(!Host %in% c("Apple", "Nasturtium", "Pear")) # lets leave out the unseen data for now


# add a column "pretrained" and set all values for all models to "TRUE" (for later)
df_sub_pre_recall_unique <- df_sub_pre_recall_unique %>%
  mutate(Pretrained = TRUE)
  
# Precision for entire leaf set (seen hosts only)
# Figure 6A, left panel
p3 <- df_sub_pre_recall_unique %>%
  ggplot(., aes(x = Number_objects_in_image, y = Overall_Precision)) +
  geom_jitter(alpha = 0.5, width = 0.2, height = 0.02) +
  geom_smooth(method = "loess") +
  facet_grid(Model_version ~ Padded, scales = "free_x") +
  ggtitle("Precision vs. Number of Objects in Image") +
  xlab("Number of Ground Truth Objects") +
  ylab("Per-Image Precision") +
  theme_classic() + 
  theme(
    strip.text = element_text(size = 12),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

p3
## `geom_smooth()` using formula = 'y ~ x'

# Recall for entire leaf set (seen hosts only) 
# Figure 6B, left panel
p4<-df_sub_pre_recall_unique %>%
  ggplot(., aes(x = Number_objects_in_image, y = Overall_Recall)) +
  geom_jitter(alpha = 0.5, width = 0.2, height = 0.02) +
  geom_smooth(method = "loess") +
  facet_grid(Model_version ~ Padded, scales = "free_x") +
  ggtitle("Recall vs. Number of Objects in Image") +
  xlab("Number of Ground Truth Objects") +
  ylab("Per-Image Recall") +
  theme_classic() + 
  theme(
    strip.text = element_text(size = 12),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

p4
## `geom_smooth()` using formula = 'y ~ x'

# save
ggsave("precision_vs_number_objects.png", width = 8, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
# count number of unique image_names
df_sub_pre_recall_unique %>%
  group_by(Model_version, Host) %>%
  summarise(n_unique_image_names = n_distinct(Image_name), .groups = "drop") %>%
  arrange(desc(n_unique_image_names))
## # A tibble: 21 × 3
##    Model_version Host       n_unique_image_names
##            <dbl> <chr>                     <int>
##  1           209 Hop                         121
##  2           210 Hop                         121
##  3           211 Hop                         121
##  4           209 Strawberry                   21
##  5           210 Strawberry                   21
##  6           211 Strawberry                   21
##  7           209 Bean                         16
##  8           210 Bean                         16
##  9           211 Bean                         16
## 10           209 Tomato                       14
## # ℹ 11 more rows

Run Spearman Correlation Tests on Precision and Recall Vs. Number of Objects

Correlation Tests (Pearson/Spearman style): - Computes correlation (r) and significance (p-value) between number of objects and both precision and recall.
- Results are grouped by model version

# spearman tests for difference in precision (seen hosts only)
df_sub_pre_recall_unique %>%
  group_by(Model_version) %>%
  summarize(
    r     = cor(Number_objects_in_image, Overall_Precision, method = "pearson"),
    p_val = cor.test(Number_objects_in_image, Overall_Precision, method = "pearson")$p.value, 
    n_unique_image_names = n_distinct(Image_name),
  )
## # A tibble: 3 × 4
##   Model_version       r p_val n_unique_image_names
##           <dbl>   <dbl> <dbl>                <int>
## 1           209  0.0125 0.864                  190
## 2           210  0.116  0.110                  190
## 3           211 -0.0146 0.841                  190
# spearman tests for difference in recall (seen hosts only)
df_sub_pre_recall_unique %>%
  group_by(Model_version) %>%
  summarize(
    r     = cor(Number_objects_in_image, Overall_Recall, method = "pearson"),
    p_val = cor.test(Number_objects_in_image, Overall_Recall, method = "pearson")$p.value, 
    n_unique_image_names = n_distinct(Image_name),
  )
## # A tibble: 3 × 4
##   Model_version       r  p_val n_unique_image_names
##           <dbl>   <dbl>  <dbl>                <int>
## 1           209  0.0399 0.585                   190
## 2           210 -0.0668 0.360                   190
## 3           211 -0.126  0.0837                  190

Run Generalized Additive Models on Precision and Recall Vs. Number of Objects (S1 Fig)

Generalized Additive Models (GAMs): - Fits non-linear models predicting recall and precision from number of objects, controlling for host and model version.
- Produces diagnostic summaries and effect plots for interpretation.
- Plots are exported as vector PDFs for publication-quality graphics (S1 Fig).

# drop hosts with less than 10 rows
df_sub_pre_recall_unique %>% 
  group_by(Model_version, Host) %>%
  filter(n() >= 10) %>%
  ungroup() -> df_sub_pre_recall_unique_dropped_host_below_10

# # Generalized Additive Model (GAM) with response overall recall, fixed effects for host and model version, smooth term num of objects (nonlinear effect of object count)
gam1 <- gam(Overall_Recall ~ s(Number_objects_in_image) + Host + Model_version, data = df_sub_pre_recall_unique_dropped_host_below_10)
summary(gam1)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Overall_Recall ~ s(Number_objects_in_image) + Host + Model_version
## 
## Parametric coefficients:
##                 Estimate Std. Error t value    Pr(>|t|)    
## (Intercept)    -0.512086   1.242232  -0.412      0.6803    
## HostHop        -0.031677   0.017736  -1.786      0.0747 .  
## HostRaspberry  -0.136514   0.026793  -5.095 0.000000484 ***
## HostStrawberry -0.026131   0.021797  -1.199      0.2311    
## HostTomato     -0.127993   0.023874  -5.361 0.000000123 ***
## Model_version   0.006933   0.005915   1.172      0.2417    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf Ref.df    F p-value  
## s(Number_objects_in_image) 3.565  4.427 2.59  0.0304 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0998   Deviance explained = 11.4%
## GCV = 0.01296  Scale est. = 0.012733  n = 546
plot(gam1, select = 1)

# Save GAM plot as PDF (vector graphic)
pdf("gam_plot_recall.pdf", width = 5, height = 4)  # size in inches
plot(gam1, select = 1)
dev.off()
## quartz_off_screen 
##                 2
# Generalized Additive Model (GAM) with response overall precision, fixed effects for host and model version, smooth term num of objects (nonlinear effect of object count)
gam2 <- gam(Overall_Precision ~ s(Number_objects_in_image) + Host + Model_version, data = df_sub_pre_recall_unique_dropped_host_below_10)
summary(gam2)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Overall_Precision ~ s(Number_objects_in_image) + Host + Model_version
## 
## Parametric coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     0.392273   0.576939   0.680   0.4968  
## HostHop         0.011429   0.008219   1.391   0.1649  
## HostRaspberry   0.006329   0.012395   0.511   0.6098  
## HostStrawberry  0.015487   0.010110   1.532   0.1261  
## HostTomato     -0.025453   0.011086  -2.296   0.0221 *
## Model_version   0.002742   0.002747   0.998   0.3187  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf Ref.df     F p-value
## s(Number_objects_in_image) 3.127  3.904 0.703   0.589
## 
## R-sq.(adj) =  0.0385   Deviance explained = 5.28%
## GCV = 0.0027933  Scale est. = 0.0027466  n = 546
plot(gam2, select = 1)

# Save GAM plot as PDF (vector graphic)
pdf("gam_plot_precision.pdf", width = 5, height = 4)  # size in inches
plot(gam2, select = 1)
dev.off()
## quartz_off_screen 
##                 2
# check host counts for consistency
df_sub_pre_recall_unique_dropped_host_below_10 %>%
  dplyr::filter(Model_version == 209) %>%
  group_by(Model_version, Host) %>%
  summarise(n_unique_image_names = n_distinct(Image_name), .groups = "drop") %>%
  arrange(desc(n_unique_image_names))
## # A tibble: 5 × 3
##   Model_version Host       n_unique_image_names
##           <dbl> <chr>                     <int>
## 1           209 Hop                         121
## 2           209 Strawberry                   21
## 3           209 Bean                         16
## 4           209 Tomato                       14
## 5           209 Raspberry                    10

Looking at precision and recall by host

# group by model, and host and find mean overall precision 
df_sub_pre_recall_unique_dropped_host_below_10 <- df_sub_pre_recall_unique_dropped_host_below_10 %>%
  group_by(Model_version, Host, Mite_Brush_or_Leaf) %>%
  mutate(
    Mean_overall_Precision = mean(Overall_Precision, na.rm = TRUE),
    Mean_overall_Recall = mean(Overall_Recall, na.rm = TRUE)) %>%
  ungroup()

# Precompute counts
host_counts <- df_sub_pre_recall_unique_dropped_host_below_10 %>%
  group_by(Host, Model_version) %>%
  summarise(n = n(), .groups = "drop")

# Merge counts into main dataframe
df_labeled <- df_sub_pre_recall_unique_dropped_host_below_10 %>%
  left_join(host_counts, by = c("Host", "Model_version"))

# order hosts by mean recall (makes for prettier boxplots)
df_labeled <- df_labeled %>%
  group_by(Host, Model_version) %>%
  mutate(
    Mean_overall_Recall = mean(Overall_Recall, na.rm = TRUE),
    Host_reordered = fct_reorder(Host, Mean_overall_Recall)
  ) %>%
  ungroup()

# create a new df for dynamic labels in ggplot
df_precision_labels <- df_labeled %>%
  group_by(Host, Model_version, Padded) %>%
  summarise(
    min_precision = min(Overall_Precision, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  ) %>%
  mutate(
    y_label = pmax(0, min_precision - 0.2),
    Host_reordered = fct_reorder(Host, min_precision)  # or use mean if preferred
  )

# Add Host_reordered to main data
df_labeled <- df_labeled %>%
  mutate(Host_reordered = fct_reorder(Host, Mean_overall_Precision))

# count unique file names per model_version
df_labeled_unique <- df_labeled %>%
  distinct(Model_version, Image_name, Padded, .keep_all = TRUE)

# Final precision plot
ggplot(df_labeled_unique, aes(x = Host_reordered, y = Overall_Precision)) +
  geom_boxplot() +
  geom_jitter(aes(fill = Host), alpha = 0.5, width = 0.2, height = 0.02, shape=21) +
  geom_text(
    data = df_precision_labels,
    aes(x = Host_reordered, y = y_label, label = paste0("n = ", n)),
    inherit.aes = FALSE,
    size = 3
  ) +
  coord_flip() +
  facet_wrap(~Model_version) +
  ylim(0, 1.05) +
  theme_classic() +
  ggtitle("Precision by Host") +
  ylab("Precision") +
  xlab("Host") +
  theme(legend.position = "none") +
  scale_fill_viridis_d()

# save last plot
#ggsave("precision_by_host_boxplot.png", width = 8, height = 4, dpi = 300)

# Ensure Host_reordered is in the label data
df_labels <- df_labeled %>%
  group_by(Host, Model_version, Host_reordered) %>%
  summarise(
    min_recall = min(Overall_Recall),
    n = n(),
    .groups = "drop"
  ) %>%
  mutate(label_y = pmax(0, min_recall - 0.125))

# Plot with dynamic label placement
ggplot(df_labeled, aes(x = fct_reorder(Host, Overall_Recall), y = Overall_Recall)) +
  geom_boxplot() +
geom_jitter(aes(fill = Host), 
            position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.75), 
            alpha = 0.5,
            shape=21) +
  geom_text(
    data = df_labels,
    aes(x = Host_reordered, y = label_y, label = paste0("n = ", n)),
    inherit.aes = FALSE,
    size = 3
  ) +
  coord_flip() +
  facet_grid(Padded~Model_version) +
  ylim(0, 1) +
  theme_classic() +
  ggtitle("Recall by Host") +
  ylab("Recall") +
  theme(
    legend.position = "none",
    axis.title.y = element_blank()
  )+
  scale_fill_viridis_d()

# save
ggsave("recall_by_host_boxplot.svg", width = 7, height = 4, dpi = 300)

# rough model comparison for precision
ggplot(df_sub_pre_recall_unique, aes(x = factor(Model_version), y = Overall_Precision)) +
  geom_boxplot() +
  geom_jitter(aes(fill = "black"), position = position_jitterdodge(jitter.width = 0.2)) +
  ggtitle("Precision by Model Version") +
  theme_classic() +
  theme(legend.position = "none")

# rough model comparison for recall
ggplot(df_sub_pre_recall_unique, aes(x = factor(Model_version), y = Overall_Recall)) +
  geom_boxplot() +
  geom_jitter(aes(fill = "black"), position = position_jitterdodge(jitter.width = 0.2)) +
  ggtitle("Recall by Model Version") +
  theme_classic() +
  theme(legend.position = "none")

Performance across hop cultivars

# Optional: filter to hop-related cultivar if necessary
df_hop <- df_sub_pre_recall_unique %>%
  dplyr::filter(Host == "Hop") %>%
  na.omit(Cultivar) %>%
  group_by(Cultivar) %>%
  # only keep data that have 4 or more unique file names 
  filter(n_distinct(Image_name) >= 10) %>%
  ungroup

ggplot(df_hop, aes(x = fct_reorder(Cultivar, Overall_Precision), y = Overall_Precision)) +
  geom_boxplot() +
  geom_jitter(aes(fill = "black"), position = position_jitterdodge(jitter.width = 0.2), alpha=0.5) +
  coord_flip() +
  facet_wrap(~Model_version)+
  ggtitle("Precision by Hop Cultivar") +
  theme_classic() +
  ylim(0,1)+
  theme(
    legend.position = "none")

ggplot(df_hop, aes(x = fct_reorder(Cultivar, Overall_Recall), y = Overall_Recall)) +
  geom_boxplot() +
  geom_jitter(aes(fill = "black"), position = position_jitterdodge(jitter.width = 0.2), alpha=0.5) +
  facet_wrap(~Model_version) +
  coord_flip() +
  ggtitle("Recall by Hop Cultivar") +
  theme_classic() +
  ylim(0,1)+
  theme(
    legend.position = "none")

# print number of unique filenames for each cultivar
unique_filenames <- df_hop %>%
  group_by(Cultivar) %>%
  summarize(unique_filenames = n_distinct(Image_name))

unique_filenames
## # A tibble: 3 × 2
##   Cultivar    unique_filenames
##   <chr>                  <int>
## 1 Cascade                   47
## 2 Nugget                    15
## 3 Pacific_Gem               17
gam3 <- gam(Overall_Recall ~ s(Number_objects_in_image) + Cultivar + Model_version, data = df_hop)
summary(gam3)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Overall_Recall ~ s(Number_objects_in_image) + Cultivar + Model_version
## 
## Parametric coefficients:
##                      Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)         -0.183524   1.531967  -0.120     0.905    
## CultivarNugget       0.025210   0.015710   1.605     0.110    
## CultivarPacific_Gem -0.065791   0.014984  -4.391 0.0000172 ***
## Model_version        0.005357   0.007295   0.734     0.463    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                            edf Ref.df     F p-value
## s(Number_objects_in_image)   1      1 0.029   0.866
## 
## R-sq.(adj) =  0.0914   Deviance explained = 10.7%
## GCV = 0.0085894  Scale est. = 0.0084082  n = 237
gam4 <- gam(Overall_Precision ~ s(Number_objects_in_image) + Cultivar + Model_version, data = df_hop)
summary(gam4)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## Overall_Precision ~ s(Number_objects_in_image) + Cultivar + Model_version
## 
## Parametric coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          0.487655   0.798979   0.610   0.5422  
## CultivarNugget       0.001463   0.008193   0.179   0.8585  
## CultivarPacific_Gem -0.015232   0.007815  -1.949   0.0525 .
## Model_version        0.002362   0.003805   0.621   0.5353  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                            edf Ref.df     F p-value
## s(Number_objects_in_image)   1      1 0.382   0.537
## 
## R-sq.(adj) =  0.00446   Deviance explained = 2.13%
## GCV = 0.0023363  Scale est. = 0.002287  n = 237

Run correlation analysis on hop test images

# Filter for only 'Hop' host
hop_data <- df2 %>%
  filter(Host == "Hop")

# print number of hop images in each model version
hop_data %>%
  group_by(Model_version) %>%
  summarise(n_images = n_distinct(Image_name))
## # A tibble: 3 × 2
##   Model_version n_images
##           <dbl>    <int>
## 1           209      122
## 2           210      121
## 3           211      122
# only keep images that are in all three model_versions
hop_data <- hop_data %>%
  group_by(Image_name) %>%
  filter(n_distinct(Model_version) >= 3) %>%
  ungroup()

# Group by Model_version and calculate correlations
cor_results <- hop_data %>%
  group_by(Model_version) %>%
  summarise(
    spearman_rho = cor(Total_GT, Total_detections, method = "spearman", use = "complete.obs"),
    pearson_r = cor(Total_GT, Total_detections, method = "pearson", use = "complete.obs"),
    .groups = "drop"
  )

print(cor_results)
## # A tibble: 3 × 3
##   Model_version spearman_rho pearson_r
##           <dbl>        <dbl>     <dbl>
## 1           209        0.953     0.994
## 2           210        0.987     0.993
## 3           211        0.979     0.991
ggplot(hop_data, aes(x = Total_GT, y = Total_detections)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "blue", linetype = "dashed") +
  stat_cor(method = "pearson", label.x = 1, label.y = max(hop_data$Total_detections, na.rm = TRUE), 
           aes(label = paste0("Pearsons: ", ..r..)), size = 3.5) +
  facet_wrap(~ Model_version) +
  theme_classic() +
  labs(
    title = "Ground Truth vs. Predicted Detections by Model Version",
    x = "Total Ground Truth Objects",
    y = "Total Predicted Detections"
  )
## `geom_smooth()` using formula = 'y ~ x'

ggsave("ground_truth_vs_predicted_hop_test_images.png", width = 8, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'

Pie chart looking at images in dataset

This includes all images – including those not used in training (or even testing).

# Data
# This includes all images -- including those not used in training (or even testing).
image_data <- data.frame(
  Image_Type = c("Hop", "Bean", "Synthetic", "Mite Brush", "Strawberry", 
                 "Tomato", "Raspberry", "Columbine", "Apple", "Nasturtium", 
                 "Pear", "Poppy", "CNC microscope"),
  Count = c(720, 277, 200, 189, 68, 39, 34, 31, 21, 16, 15, 10, 10)
)

# Compute percentages and labels
image_data <- image_data %>%
  arrange(desc(Count)) %>%
  mutate(Percent = Count / sum(Count),
         Label = paste0(Image_Type, "\n(", round(Percent * 100, 1), "%)"))

# convert to long data for lessR piechart
long_image_data <- image_data[rep(1:nrow(image_data), times = image_data$Count), , drop = FALSE]

# group those with 1% or less into "Other" Category
long_image_data <- long_image_data %>%
  mutate(Image_Type = ifelse(Percent < 0.02, "Other", Image_Type))

PieChart(Image_Type,
         data = long_image_data,
         hole = 0.5,
         values_size = 1.5,
         labels = "%",  
         # label size
         main = "Distribution of Image Types in Dataset",
         fill = "viridis",
         #pdf_file = "image_type_donut_chart.png",
         width = 6.5,
         height = 8)
## >>> Parameters  values, values_color, etc. now renamed to:  labels, labels_color, etc.
##     Old parameter names will stop working in the future.

## >>> suggestions
## PieChart(Image_Type, hole=0)  # traditional pie chart
## PieChart(Image_Type, labels="%")  # display %'s on the chart
## PieChart(Image_Type)  # bar chart
## Plot(Image_Type)  # bubble plot
## Plot(Image_Type, labels="count")  # lollipop plot 
## 
## --- Image_Type --- 
## 
## Image_Type  Count   Prop 
## ------------------------- 
##       Bean    277   0.170 
##        Hop    720   0.442 
## Mite Brush    189   0.116 
##      Other    103   0.063 
##  Raspberry     34   0.021 
## Strawberry     68   0.042 
##  Synthetic    200   0.123 
##     Tomato     39   0.024 
## ------------------------- 
##      Total   1630   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 1750.417, df = 7, p-value = 0.000
# Create a dataframe with your counts
class_data <- data.frame(
  Image_Type = c("Viable egg", "Immature", "Dead mite", "Adult female", "Adult male"),
  Count = c(18324, 7435, 3182, 1847, 1471)
)

# Calculate total and percent
class_data <- class_data %>%
  mutate(Percent = Count / sum(Count))

# Repeat rows to prepare for PieChart
long_class_data <- class_data[rep(1:nrow(class_data), times = class_data$Count), , drop = FALSE]

# Group categories <2% into "Other"
long_class_data <- long_class_data %>%
  mutate(Image_Type = ifelse(Percent < 0.02, "Other", Image_Type))

# Create the donut pie chart
PieChart(Image_Type,
         data = long_class_data,
         hole = 0.5,
         values_size = 1.5,
         labels = "%",  # This shows both raw count and percent
         main = "Distribution of Object Classes in Annotated Dataset",
         fill = "viridis",
         #pdf_file = "object_class_donut_chart.pdf",
         width = 7,
         height = 8)
## >>> Parameters  values, values_color, etc. now renamed to:  labels, labels_color, etc.
##     Old parameter names will stop working in the future.

## >>> suggestions
## PieChart(Image_Type, hole=0)  # traditional pie chart
## PieChart(Image_Type, labels="%")  # display %'s on the chart
## PieChart(Image_Type)  # bar chart
## Plot(Image_Type)  # bubble plot
## Plot(Image_Type, labels="count")  # lollipop plot 
## 
## --- Image_Type --- 
## 
##   Image_Type   Count   Prop 
## ---------------------------- 
## Adult female    1847   0.057 
##   Adult male    1471   0.046 
##    Dead mite    3182   0.099 
##     Immature    7435   0.230 
##   Viable egg   18324   0.568 
## ---------------------------- 
##        Total   32259   1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 30785.201, df = 4, p-value = 0.000

Charts reflecting just training data

# --- All model data combined ---

all_data <- tribble(
  ~Model, ~Dataset, ~Class, ~Source, ~Count,
  # v209
  "v209", "Training", "Viable_egg", "Real", 9504,
  "v209", "Training", "Viable_egg", "Synthetic", 105,
  "v209", "Training", "Immature", "Real", 2726,
  "v209", "Training", "Immature", "Synthetic", 264,
  "v209", "Training", "Adult_female", "Real", 472,
  "v209", "Training", "Adult_female", "Synthetic", 672,
  "v209", "Training", "Dead_mite", "Real", 517,
  "v209", "Training", "Dead_mite", "Synthetic", 538,
  "v209", "Training", "Adult_male", "Real", 208,
  "v209", "Training", "Adult_male", "Synthetic", 746,
  "v209", "Validation", "Viable_egg", "Real", 1316,
  "v209", "Validation", "Immature", "Real", 544,
  "v209", "Validation", "Adult_female", "Real", 78,
  "v209", "Validation", "Dead_mite", "Real", 116,
  "v209", "Validation", "Adult_male", "Real", 39,
  
  # v210
  "v210", "Training", "Viable_egg", "Real", 9504,
  "v210", "Training", "Viable_egg", "Synthetic", 105,
  "v210", "Training", "Immature", "Real", 2726,
  "v210", "Training", "Immature", "Synthetic", 264,
  "v210", "Training", "Adult_female", "Real", 472,
  "v210", "Training", "Adult_female", "Synthetic", 672,
  "v210", "Validation", "Viable_egg", "Real", 1316,
  "v210", "Validation", "Immature", "Real", 544,
  "v210", "Validation", "Adult_female", "Real", 78,
  
  # v211
  "v211", "Training", "Viable_egg", "Real", 9504,
  "v211", "Training", "Viable_egg", "Synthetic", 105,
  "v211", "Training", "Immature", "Real", 2726,
  "v211", "Training", "Immature", "Synthetic", 264,
  "v211", "Training", "Adult_female", "Real", 472,
  "v211", "Training", "Adult_female", "Synthetic", 672,
  "v211", "Training", "Adult_male", "Real", 208,
  "v211", "Training", "Adult_male", "Synthetic", 746,
  "v211", "Validation", "Viable_egg", "Real", 1316,
  "v211", "Validation", "Immature", "Real", 544,
  "v211", "Validation", "Adult_female", "Real", 78,
  "v211", "Validation", "Adult_male", "Real", 39
)



# --- Order classes by total instance count ---

class_order <- all_data %>%
  group_by(Class) %>%
  summarise(Total = sum(Count)) %>%
  arrange(desc(Total)) %>%
  pull(Class)

# Apply order to Class and Model
all_data <- all_data %>%
  mutate(
    Class = factor(Class, levels = class_order),
    Model = factor(Model, levels = c("v209", "v211", "v210")),
    Dataset = factor(Dataset, levels = c("Training", "Validation"))
  )

test_data <- tribble(
  ~Class, ~Test_Set, ~Instances,
  "Adult_female", "Leaf Test Set", 197,
  "Adult_male", "Leaf Test Set", 74,
  "Dead_mite", "Leaf Test Set", 71,
  "Immature", "Leaf Test Set", 705,
  "Viable_egg", "Leaf Test Set", 3402,
  "Adult_female", "Acaricide Test Set", 88,
  "Adult_male", "Acaricide Test Set", 10,
  "Dead_mite", "Acaricide Test Set", 97,
  "Immature", "Acaricide Test Set", 10,
  "Viable_egg", "Acaricide Test Set", 504,
  "Adult_female", "Original Test Set", 62,
  "Adult_male", "Original Test Set", 55,
  "Dead_mite", "Original Test Set", 103,
  "Immature", "Original Test Set", 437,
  "Viable_egg", "Original Test Set", 1184
) %>%
  dplyr::rename(Source = Test_Set, Count = Instances) %>%
  mutate(
    Model = "v209",       # or change to "All" or NA if not model-specific
    Dataset = "Test"
  )

# Combine test data with all_data
combined_data <- bind_rows(all_data, test_data)

add_models <- tribble(
  ~Model, ~Dataset, ~Class, ~Source, ~Count,
"v210","Test","Adult_female","Leaf Test Set", 197,
"v210","Test","Immature","Leaf Test Set", 705,
"v210","Test","Viable_egg","Leaf Test Set", 3402,
"v211","Test","Adult_female","Leaf Test Set", 197,
"v211","Test","Immature","Leaf Test Set", 705,
"v211","Test","Viable_egg","Leaf Test Set", 3402,
"v211","Test","Adult_male","Leaf Test Set", 74,
"v211","Test","Adult_female","Acaricide Test Set", 88,
"v211","Test","Immature","Acaricide Test Set", 10,
"v211","Test","Viable_egg","Acaricide Test Set", 504,
"v211","Test","Adult_male","Acaricide Test Set", 10,
"v210","Test","Adult_female","Acaricide Test Set", 88,
"v210","Test","Immature","Acaricide Test Set", 10,
"v210","Test","Viable_egg","Acaricide Test Set", 504,
"v211","Test","Adult_female","Original Test Set", 62,
"v211","Test","Immature","Original Test Set", 437,
"v211","Test","Viable_egg","Original Test Set", 1184,
"v211","Test","Adult_male","Original Test Set", 55,
"v210","Test","Adult_female","Original Test Set", 62,
"v210","Test","Immature","Original Test Set", 437,
"v210","Test","Viable_egg","Original Test Set", 1184)

combined_data <- bind_rows(combined_data, add_models)

# Reorder factors for Class and Dataset
combined_data <- combined_data %>%
  mutate(
    Class = factor(Class, levels = class_order),
    Model = factor(Model, levels = c("v209", "v211", "v210")),
    Dataset = factor(Dataset, levels = c("Training", "Validation", "Test"))
  )


combined_data <- combined_data %>%
  mutate(
    Dataset = factor(Dataset, levels = c("Training", "Validation", "Test")),
    Model = forcats::fct_recode(Model,
      "V209 – Five Classes" = "v209",
      "V211 – Four Classes" = "v211",
      "V210 – Three Classes" = "v210"
    ),
    # Order Facet by Dataset, then Model
    Facet = interaction(Model, Dataset, sep = " – "),
    Facet = fct_reorder(Facet, as.numeric(Dataset))  # Order by Dataset level
  )

p <- ggplot(combined_data, aes(x = Class, y = Count, fill = Source)) +
  geom_bar(stat = "identity", position = "stack", color = "black") +
  facet_wrap(~ Facet, scales = "free_y", ncol = 3) +
  scale_fill_manual(
    name = "Source",
    values = c(
      "Real" = "#3F648A",
      "Synthetic" = "#4DA29D",
      "Original Test Set" = "#3F648A",
      "Leaf Test Set" = "#F8E665",
      "Acaricide Test Set" = "#8FCF63"
    ),
    breaks = c("Real", "Synthetic", "Original Test Set", "Leaf Test Set", "Acaricide Test Set"),
    labels = c("Real (Train/Val)", "Synthetic (Train)", "Original Test Set", "Leaf Test Set", "Acaricide Test Set")
  ) +
  labs(
    title = "Train, Validation, and Test Class Breakdown by Model",
    x = "Class", y = "Instance Count"
  ) +
  theme_classic(base_size = 14) +
  theme(
    strip.background = element_rect(fill = "white", color = "black"),
    strip.placement = "outside",
    axis.text.x = element_text(angle = 30, hjust = 1),
    legend.position = "bottom",
    legend.title = element_text(face = "bold")
  )

p

# Save
#ggsave("model_class_breakdown_with_test.svg", p, width = 8, height = 11, dpi = 300)

Unseen host performance evaluation

# keep only Image_Names found in all three models
df_sub <- df2 %>%
  group_by(Image_name) %>%
  filter(Padded=="FALSE")%>%
  ungroup()

# dont want to overplot, so dropping columns for now
df_sub_pre_recall_unique_unseen_hosts <- df_sub %>%
  dplyr::select(c(Model_version,
                  Image_name,
                  Number_objects_in_image,
                  Host,
                  Cultivar,
                  Mite_Brush_or_Leaf,
                  Overall_Recall,
                  Overall_Precision)) %>%
  unique() %>%
  filter(Host %in% c("Apple", "Nasturtium", "Pear"))

# Precision for entire leaf set (unseen hosts only)
df_sub_pre_recall_unique_unseen_hosts %>%
  ggplot(., aes(x = Number_objects_in_image, y = Overall_Precision)) +
  geom_jitter(alpha = 0.5, width = 0.2, height = 0.02) +
  geom_smooth(method = "loess") +
  facet_grid(Model_version ~ Mite_Brush_or_Leaf, scales = "free_x") +
  ggtitle("Precision vs. Number of Objects in Image (Unseen Hosts)") +
  xlab("Number of Ground Truth Objects") +
  ylab("Per-Image Precision") +
  theme_classic() + 
  theme(
    strip.text = element_text(size = 12),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )
## `geom_smooth()` using formula = 'y ~ x'

# Recall for entire leaf set (unseen hosts only)
df_sub_pre_recall_unique_unseen_hosts %>%
  ggplot(., aes(x = Number_objects_in_image, y = Overall_Recall)) +
  geom_jitter(alpha = 0.5, width = 0.2, height = 0.02) +
  geom_smooth(method = "loess") +
  facet_grid(Model_version ~ Mite_Brush_or_Leaf, scales = "free_x") +
  ggtitle("Recall vs. Number of Objects in Image (All Seen Hosts (n=188))") +
  xlab("Number of Ground Truth Objects") +
  ylab("Per-Image Recall") +
  theme_classic() + 
  theme(
    strip.text = element_text(size = 12),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )
## `geom_smooth()` using formula = 'y ~ x'

# Ensure Host_reordered is in the label data
df_labels <- df_sub_pre_recall_unique_unseen_hosts %>%
  group_by(Host, Model_version) %>%
  summarise(
    min_recall = min(Overall_Recall),
    n = n(),
    .groups = "drop"
  ) %>%
  mutate(label_y = pmax(0, min_recall - 0.125))


# Recall
ggplot(df_sub_pre_recall_unique_unseen_hosts, aes(x = fct_reorder(Host, Overall_Recall), y = Overall_Recall)) +
  geom_boxplot() +
geom_jitter(aes(fill = Host), 
            position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.75), 
            alpha = 0.5,
            shape=21) +
  geom_text(
    data = df_labels,
    aes(x = Host, y = label_y, label = paste0("n = ", n)),
    inherit.aes = FALSE,
    size = 3
  ) +
  coord_flip() +
  facet_wrap(~Model_version) +
  ylim(0, 1) +
  theme_classic() +
  ggtitle("Recall by Host") +
  ylab("Recall") +
  theme(
    legend.position = "none",
    axis.title.y = element_blank()
  )+
  scale_fill_viridis_d()

# Precision
ggplot(df_sub_pre_recall_unique_unseen_hosts, aes(x = fct_reorder(Host, Overall_Precision), y = Overall_Precision)) +
  geom_boxplot() +
geom_jitter(aes(fill = Host), 
            position = position_jitterdodge(jitter.width = 0.5, dodge.width = 0.75), 
            alpha = 0.5,
            shape=21) +
  geom_text(
    data = df_labels,
    aes(x = Host, y = label_y, label = paste0("n = ", n)),
    inherit.aes = FALSE,
    size = 3
  ) +
  coord_flip() +
  facet_wrap(~Model_version) +
  ylim(0, 1) +
  theme_classic() +
  ggtitle("Precision by Host") +
  ylab("Precision") +
  theme(
    legend.position = "none",
    axis.title.y = element_blank()
  )+
  scale_fill_viridis_d()

# Make long
df_long <- df_sub_pre_recall_unique_unseen_hosts %>%
  pivot_longer(cols = c(Overall_Recall, Overall_Precision),
               names_to = "Metric",
               values_to = "Value") %>%
  mutate(Metric = dplyr::recode(Metric,
                         Overall_Recall = "Recall",
                         Overall_Precision = "Precision"))

# define color-blind friendly palette (for accessibilty and consistency)
pal2 <- c("#4E4B80","#4CA09C")

# Per-Image Precision and Recall Boxplots for Nasturtium, Apple, and Pear
ggplot(df_long, aes(x = fct_reorder(Host, Value), y = Value, fill = Metric)) +
  geom_boxplot(position = position_dodge(width = 0.75), outlier.shape = NA) +
  geom_jitter(position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.75),
              shape = 21, alpha = 0.5) +
  #geom_text(
  #  data = df_labels,  # Make sure this is compatible with new data structure or adjust accordingly
  #  aes(x = Host, y = label_y, label = paste0("n = ", n)),
  #  inherit.aes = FALSE,
  #  size = 3
  #) +
  coord_flip() +
  facet_wrap(~Model_version) +
  ylim(0, 1) +
  theme_classic() +
  ggtitle("Per-Image Precision and Recall by For Unseen Hosts") +
  ylab("Value") +
  theme(
    axis.title.y = element_blank()
  ) +
  scale_fill_manual(values = pal2)

# save
#ggsave("precision_recall_on_unseen_hosts_dodged.svg", width = 6, height = 6, dpi = 300)


# add a column called "pretrained" and set all values in this dataset to no
df_sub_pre_recall_unique_unseen_hosts <- df_sub_pre_recall_unique_unseen_hosts %>%
  mutate(pretrained = "FALSE")

# concat rows for df_sub_pre_recall_unique_unseen_hosts and df_sub_pre_recall_unique
df_sub_pre_recall_unique_combined <- bind_rows(
  df_sub_pre_recall_unique_unseen_hosts,
  df_sub_pre_recall_unique
)

# Pivot long by recall or precision value
df_long_all <- df_sub_pre_recall_unique_combined %>%
  pivot_longer(cols = c(Overall_Recall, Overall_Precision),
               names_to = "Metric",
               values_to = "Value") %>%
  mutate(Metric = dplyr::recode(Metric,
                         Overall_Recall = "Recall",
                         Overall_Precision = "Precision"))

# replace NA in Pretrained column with "FALSE"
df_long_all <- df_long_all %>%
  mutate(Pretrained = ifelse(is.na(Pretrained), "FALSE", Pretrained)) %>%
  dplyr::select(!(pretrained))

# df that sums unseen and seen hosts in boxplot
df_labels_seen_unseen <- df_long_all %>%
  dplyr::filter(Metric == "Recall") %>%
  group_by(Pretrained, Model_version) %>%
  summarise(
    min_value = min(Value),
    n = n(),
    .groups = "drop"
  ) %>%
  mutate(label_y = pmax(0, min_value - 0.125))

# plot by host
df_long_all %>%
  dplyr::filter(!Host %in% c("Poppy","Columbine")) %>%
  ggplot(aes(x = fct_reorder(Host, Value), y = Value, fill = Metric)) +
                  geom_boxplot(position = position_dodge(width = 0.75), outlier.shape = NA) +
                  geom_jitter(position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
                              shape = 21, alpha = 0.5) +
                  coord_flip() +
                  facet_wrap(~Model_version) +
                  ylim(0, 1) +
                  theme_classic() +
                  ggtitle("Per-Image Precision and Recall by For Unseen Hosts") +
                  ylab("Value") +
                  theme(
                    axis.title.y = element_blank()) +
  scale_fill_manual(values = pal2)

# grouped
df_long_all %>%
  dplyr::filter(!Host %in% c("Poppy","Columbine")) %>%
                  ggplot(aes(x = fct_reorder(Pretrained, Value), y = Value, fill = Metric)) +
                  geom_boxplot(position = position_dodge(width = 0.75), outlier.shape = NA) +
                  geom_jitter(position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.75),
                              shape = 21, alpha = 0.5) +
                  coord_flip() +
                  facet_wrap(~Model_version) +
                  ylim(0, 1) +
                  theme_classic() +
                  ggtitle("Per-Image Precision and Recall Comparing Seen and Unseen Hosts") +
                  ylab("Value") +
                  theme(
                    axis.title.y = element_blank()) +
    scale_fill_manual(values = pal2)

# save
#ggsave("precision_recall_by_host_unseen_hosts.png", width = 8, height = 6, dpi = 300)

# Filter only precision data
df_precision <- df_long_all %>%
  filter(Metric == "Precision", !Host %in% c("Poppy", "Columbine"))

# Convert Pretrained to factor
df_precision$Pretrained <- as.factor(df_precision$Pretrained)

# Run linear model
lm_test <- lm(Value ~ Pretrained, data = df_precision)
summary(lm_test)
## 
## Call:
## lm(formula = Value ~ Pretrained, data = df_precision)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.92531 -0.00132  0.02418  0.02418  0.07469 
## 
## Coefficients:
##                Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)    0.925314   0.006856 134.972 < 0.0000000000000002 ***
## PretrainedTRUE 0.050506   0.007774   6.497       0.000000000155 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08563 on 700 degrees of freedom
## Multiple R-squared:  0.05688,    Adjusted R-squared:  0.05553 
## F-statistic: 42.21 on 1 and 700 DF,  p-value: 0.0000000001553
df_recall <- df_long_all %>%
  filter(Metric == "Recall", !Host %in% c("Poppy", "Columbine"))

# Convert Pretrained to factor
df_recall$Pretrained <- as.factor(df_recall$Pretrained)

# Run linear model
lm_test_recall <- lm(Value ~ Pretrained, data = df_recall)
summary(lm_test_recall)
## 
## Call:
## lm(formula = Value ~ Pretrained, data = df_recall)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79824 -0.06943  0.03557  0.09757  0.20176 
## 
## Coefficients:
##                Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)     0.79824    0.01156   69.07 < 0.0000000000000002 ***
## PretrainedTRUE  0.10418    0.01310    7.95  0.00000000000000749 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1444 on 700 degrees of freedom
## Multiple R-squared:  0.08281,    Adjusted R-squared:  0.0815 
## F-statistic:  63.2 on 1 and 700 DF,  p-value: 0.000000000000007488
# precision
wilcox.test(
  Value ~ Pretrained,
  data = df_precision
)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Value by Pretrained
## W = 41044, p-value = 0.3813
## alternative hypothesis: true location shift is not equal to 0
# precision
wilcox.test(
  Value ~ Pretrained,
  data = df_recall
)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Value by Pretrained
## W = 33716, p-value = 0.00004318
## alternative hypothesis: true location shift is not equal to 0
# Visual check
plot(lm_test_recall)

plot(lm_test)

# Formal test for normality
shapiro.test(residuals(lm_test_recall))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm_test_recall)
## W = 0.89899, p-value < 0.00000000000000022
shapiro.test(residuals(lm_test))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm_test)
## W = 0.61681, p-value < 0.00000000000000022
# sum median for each host model combination
median_perform<- df_long_all %>%
  group_by(Host, Model_version, Metric) %>%
  summarise(Median_Value = median(Value, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(Median_Value)) %>%
  print(n = Inf)
## # A tibble: 60 × 4
##    Host       Model_version Metric    Median_Value
##    <chr>              <dbl> <chr>            <dbl>
##  1 Apple                209 Precision        1    
##  2 Apple                210 Precision        1    
##  3 Apple                210 Recall           1    
##  4 Apple                211 Precision        1    
##  5 Bean                 209 Precision        1    
##  6 Bean                 210 Precision        1    
##  7 Bean                 210 Recall           1    
##  8 Bean                 211 Precision        1    
##  9 Bean                 211 Recall           1    
## 10 Hop                  209 Precision        1    
## 11 Hop                  210 Precision        1    
## 12 Hop                  211 Precision        1    
## 13 Nasturtium           209 Precision        1    
## 14 Nasturtium           210 Precision        1    
## 15 Nasturtium           211 Precision        1    
## 16 Pear                 209 Precision        1    
## 17 Pear                 210 Precision        1    
## 18 Pear                 211 Precision        1    
## 19 Raspberry            209 Precision        1    
## 20 Raspberry            210 Precision        1    
## 21 Raspberry            211 Precision        1    
## 22 Strawberry           209 Precision        1    
## 23 Strawberry           210 Precision        1    
## 24 Strawberry           211 Precision        1    
## 25 Tomato               209 Precision        1    
## 26 Tomato               211 Precision        1    
## 27 Tomato               210 Precision        0.984
## 28 Columbine            210 Precision        0.978
## 29 Hop                  210 Recall           0.974
## 30 Bean                 209 Recall           0.972
## 31 Columbine            211 Precision        0.939
## 32 Hop                  209 Recall           0.939
## 33 Hop                  211 Recall           0.938
## 34 Strawberry           210 Recall           0.938
## 35 Nasturtium           210 Recall           0.928
## 36 Raspberry            210 Recall           0.928
## 37 Columbine            209 Precision        0.928
## 38 Strawberry           209 Recall           0.918
## 39 Raspberry            211 Recall           0.916
## 40 Columbine            210 Recall           0.911
## 41 Strawberry           211 Recall           0.907
## 42 Columbine            211 Recall           0.903
## 43 Columbine            209 Recall           0.893
## 44 Poppy                210 Precision        0.891
## 45 Poppy                211 Precision        0.882
## 46 Pear                 209 Recall           0.875
## 47 Pear                 210 Recall           0.875
## 48 Pear                 211 Recall           0.875
## 49 Tomato               210 Recall           0.875
## 50 Nasturtium           209 Recall           0.845
## 51 Tomato               211 Recall           0.828
## 52 Tomato               209 Recall           0.822
## 53 Poppy                209 Precision        0.820
## 54 Poppy                210 Recall           0.819
## 55 Poppy                211 Recall           0.800
## 56 Apple                211 Recall           0.75 
## 57 Raspberry            209 Recall           0.75 
## 58 Poppy                209 Recall           0.746
## 59 Nasturtium           211 Recall           0.732
## 60 Apple                209 Recall           0.7

Miticidal data

Tidy and summarize data

floramite_data <- df3 %>%
  mutate(
    # Extract number right before 'ppm' (e.g., 3_125 from "307-3_125ppm")
    ppm_raw = str_extract(Image_name, "\\d+_?\\d*(?=ppm)"),
    ppm = as.numeric(str_replace(ppm_raw, "_", "."))
  ) %>%
  #drop ppm_raw
  dplyr::select(-ppm_raw)

# fill NA in ppm row with "0"
floramite_data <- floramite_data %>%
  mutate(ppm = ifelse(is.na(ppm), 0, ppm))
  
# Summarize total detections by disk and class
summary_floramite <- floramite_data %>%
  group_by(Image_name, ppm, Class, Confidence_threshold) %>%
  summarise(
    cv_detections = sum(Total_detections),
    gt_detections = sum(Total_GT),
    .groups = "drop"
  )

# Pivot cv_detections
cv_wide <- summary_floramite %>%
  dplyr::select(Image_name, ppm, Confidence_threshold, Class, cv_detections) %>%
  pivot_wider(names_from = Class, values_from = cv_detections, values_fill = 0, names_prefix = "cv_")

# Pivot gt_detections
gt_wide <- summary_floramite %>%
  dplyr::select(Image_name, ppm, Confidence_threshold, Class, gt_detections) %>%
  pivot_wider(names_from = Class, values_from = gt_detections, values_fill = 0, names_prefix = "gt_")

# Merge both wide tables
wide_df <- left_join(cv_wide, gt_wide,
                     by = c("Image_name", "ppm", "Confidence_threshold")) %>%
  mutate(
    cv_total_mite = cv_Adult_female + cv_Adult_male + cv_Immature + cv_Dead_mite,
    gt_total_mite = gt_Adult_female + gt_Adult_male + gt_Immature + gt_Dead_mite,
    
    cv_mortality = cv_Dead_mite / cv_total_mite,
    gt_mortality = gt_Dead_mite / gt_total_mite,
    
    cv_fecundity = cv_Viable_egg / cv_Adult_female,
    gt_fecundity = gt_Viable_egg / gt_Adult_female
  )

wide_df <- wide_df %>%
  dplyr::mutate(cv_fecundity = ifelse(cv_Adult_female > 0,
                               cv_Viable_egg / cv_Adult_female,
                               NA)) %>%
  dplyr::filter(Confidence_threshold == "0.2")

Fit miticide GLMs

# GLM for model-predicted mortality (cv)
mortality_model_cv <- glm(
  cbind(cv_Dead_mite, cv_Adult_female) ~ log1p(ppm),
  data = wide_df,
  family = binomial
)

summary(mortality_model_cv)
## 
## Call:
## glm(formula = cbind(cv_Dead_mite, cv_Adult_female) ~ log1p(ppm), 
##     family = binomial, data = wide_df)
## 
## Coefficients:
##             Estimate Std. Error z value   Pr(>|z|)    
## (Intercept)  -2.0973     0.4344  -4.828 0.00000138 ***
## log1p(ppm)    0.4789     0.1136   4.216 0.00002489 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 93.970  on 54  degrees of freedom
## Residual deviance: 73.017  on 53  degrees of freedom
## AIC: 132.18
## 
## Number of Fisher Scoring iterations: 4
# GLM for human-labeled mortality (gt)
mortality_model_gt <- glm(cbind(gt_Dead_mite, gt_Adult_female) ~ log1p(ppm), 
                          data = wide_df, family = binomial)
summary(mortality_model_gt)
## 
## Call:
## glm(formula = cbind(gt_Dead_mite, gt_Adult_female) ~ log1p(ppm), 
##     family = binomial, data = wide_df)
## 
## Coefficients:
##             Estimate Std. Error z value     Pr(>|z|)    
## (Intercept)  -1.9885     0.4155  -4.786 0.0000017001 ***
## log1p(ppm)    0.6384     0.1152   5.544 0.0000000296 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 103.445  on 54  degrees of freedom
## Residual deviance:  64.593  on 53  degrees of freedom
## AIC: 124.28
## 
## Number of Fisher Scoring iterations: 4

Both models show highly significant effects of increasing bifenazate on mite mortality.

The manual annotations (gt) model has a:

  • Slightly higher slope: greater sensitivity to ppm
  • Better fit: lower residual deviance and AIC
  • The model-predicted (cv) output still closely mirrors this trend, which supports the validity of your detection pipeline.

So, the detection model slightly underestimates the sensitivity, but trends are consistent.

LC50

# LC50 for model-predicted mortality
lc50_cv_fixed <- drm(
  cv_Dead_mite / cv_total_mite ~ ppm,
  weights = cv_total_mite,
  data = wide_df,
  fct = LL.4(fixed = c(NA, 0, 1, NA))  # fix c=0, d=1
)

summary(lc50_cv_fixed)
## 
## Model fitted: Log-logistic (ED50 as parameter) (2 parms)
## 
## Parameter estimates:
## 
##                Estimate Std. Error t-value  p-value   
## b:(Intercept)  -0.36049    0.12155 -2.9659 0.004518 **
## e:(Intercept) 212.40451  138.56675  1.5329 0.131256   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error:
## 
##  1.02071 (53 degrees of freedom)
ED(lc50_cv_fixed, 50, interval = "delta")  # LC50 ± CI
## 
## Estimated effective doses
## 
##        Estimate Std. Error   Lower   Upper
## e:1:50  212.405    138.567 -65.525 490.334
plot(lc50_cv_fixed, xlab = "Bifenazate (ppm)", ylab = "Model-predicted mortality")

# LC50 for ground-truth mortality
lc50_gt_fixed <- drm(gt_Dead_mite / gt_total_mite ~ ppm,
               weights = gt_total_mite,
               data = wide_df,
               fct = LL.4(fixed = c(NA, 0, 1, NA))  # fix c=0, d=1
)

summary(lc50_gt_fixed)
## 
## Model fitted: Log-logistic (ED50 as parameter) (2 parms)
## 
## Parameter estimates:
## 
##               Estimate Std. Error t-value    p-value    
## b:(Intercept) -0.50015    0.11078 -4.5146 0.00003569 ***
## e:(Intercept) 26.30811    7.56728  3.4766   0.001022 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error:
## 
##  0.884602 (53 degrees of freedom)
ED(lc50_gt_fixed, 50, interval = "delta")
## 
## Estimated effective doses
## 
##        Estimate Std. Error   Lower   Upper
## e:1:50  26.3081     7.5673 11.1301 41.4862
plot(lc50_gt_fixed, xlab = "Bifenazate (ppm)", ylab = "Manual mortality")

Overlay LC50 curves

# Step 1: Create common x values (dose range)
dose_range <- exp(seq(log(1), log(max(wide_df$ppm, na.rm = TRUE)), length.out = 100))

# Step 2: Get predictions for both models
pred_cv <- predict(lc50_cv_fixed, newdata = data.frame(ppm = dose_range))
pred_gt <- predict(lc50_gt_fixed, newdata = data.frame(ppm = dose_range))

# Step 3: Plot manual mortality
plot(dose_range, pred_gt, type = "l", lwd = 2, col = "#4E4B80",
     log = "x", ylim = c(0, 1),
     xlab = "Bifenazate (ppm, log scale)",
     ylab = "Mortality Rate",
     main = "Dose–Response: Manual vs Model Mortality")

# Step 4: Add model-predicted mortality
lines(dose_range, pred_cv, col = "#4CA09C", lwd = 2, lty = 1)

#put a line at 0.5 for LC50
abline(h = 0.5, col = "gray", lty = 2)

# Step 5: Add legend
legend("bottomright", legend = c("Manual (gt)", "Model (cv)"),
       col = c("#4E4B80", "#4CA09C"), lwd = 2, lty = c(1, 1))

Miticide assay fecundity assessment

# Check distributions
wide_df <- wide_df %>%
  dplyr::mutate(gt_fecundity = ifelse(gt_Adult_female > 0,
                                gt_Viable_egg / gt_Adult_female,
                                NA))

hist(wide_df$cv_fecundity, main = "Model fecundity", col = "#4CA09C")

hist(wide_df$gt_fecundity, main = "Manual fecundity", col = "#4E4B80")

# check for overdispersion
mean_fec <- mean(wide_df$cv_fecundity, na.rm = TRUE)
var_fec <- var(wide_df$cv_fecundity, na.rm = TRUE)
overdispersion_ratio <- var_fec / mean_fec
overdispersion_ratio 
## [1] 4.083909
# we have overdispersion and non-normal distribution. since we're using count data, let's go with a negative binomial 
# computer vision model
cv_nb_model <- glm.nb(
  cv_Viable_egg ~ log1p(ppm) + offset(log(cv_Adult_female + 1)),
  data = wide_df %>% filter(!is.na(cv_Viable_egg))
)
summary(cv_nb_model)
## 
## Call:
## glm.nb(formula = cv_Viable_egg ~ log1p(ppm) + offset(log(cv_Adult_female + 
##     1)), data = wide_df %>% filter(!is.na(cv_Viable_egg)), init.theta = 3.041239812, 
##     link = log)
## 
## Coefficients:
##             Estimate Std. Error z value             Pr(>|z|)    
## (Intercept)   2.0176     0.1862  10.836 < 0.0000000000000002 ***
## log1p(ppm)   -0.3889     0.0594  -6.547      0.0000000000586 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.0412) family taken to be 1)
## 
##     Null deviance: 112.921  on 54  degrees of freedom
## Residual deviance:  66.999  on 53  degrees of freedom
## AIC: 317.19
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.041 
##           Std. Err.:  0.962 
## 
##  2 x log-likelihood:  -311.191
# ground truth model
gt_nb_model <- glm.nb(
  gt_Viable_egg ~ log1p(ppm) + offset(log(gt_Adult_female + 1)),
  data = wide_df %>% filter(!is.na(gt_Viable_egg))
)
summary(gt_nb_model)
## 
## Call:
## glm.nb(formula = gt_Viable_egg ~ log1p(ppm) + offset(log(gt_Adult_female + 
##     1)), data = wide_df %>% filter(!is.na(gt_Viable_egg)), init.theta = 3.027281889, 
##     link = log)
## 
## Coefficients:
##             Estimate Std. Error z value             Pr(>|z|)    
## (Intercept)  1.99865    0.18549  10.775 < 0.0000000000000002 ***
## log1p(ppm)  -0.31713    0.05903  -5.372         0.0000000778 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.0273) family taken to be 1)
## 
##     Null deviance: 97.723  on 54  degrees of freedom
## Residual deviance: 66.476  on 53  degrees of freedom
## AIC: 318.65
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.027 
##           Std. Err.:  0.941 
## 
##  2 x log-likelihood:  -312.649
ggplot(wide_df, aes(x = ppm, y = cv_fecundity)) +
  geom_point(alpha = 0.5) +
  stat_smooth(method = "glm.nb", formula = y ~ log1p(x), se = TRUE, color = "darkgreen") +
  scale_x_log10() +
  labs(title = "Fecundity with Negative Binomial Fit",
       x = "Bifenazate (ppm, log scale)",
       y = "Fecundity (eggs per female)") +
  theme_classic()
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.333333
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.200000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 2.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.666667
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 0.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1.500000
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Create new data for prediction
new_data <- data.frame(ppm = seq(0, max(wide_df$ppm, na.rm = TRUE), length.out = 100),
                       cv_Adult_female = 1)  # to neutralize offset

# Predict expected viable eggs per female
new_data$pred <- predict(cv_nb_model, newdata = new_data, type = "response")

# Plot
ggplot(wide_df, aes(x = ppm, y = cv_fecundity)) +
  geom_point(alpha = 0.4) +
  geom_line(data = new_data, aes(x = ppm, y = pred), color = "blue", size = 1.2) +
  labs(
    title = "Predicted model-detected fecundity vs. Bifenazate ppm",
    y = "Viable eggs per female (cv)",
    x = "ppm"
  ) +
  theme_classic()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Miticide publication-ish figures

# Melt the wide_df for gt and cv class counts
class_summary <- wide_df %>%
  dplyr::select(ppm, starts_with("gt_"), starts_with("cv_")) %>%
  pivot_longer(
    cols = -ppm,
    names_to = c("source", "class"),
    names_pattern = "(gt|cv)_(.*)",
    values_to = "count"
  )

# Summarize across replicates
class_summary_agg <- class_summary %>%
  group_by(ppm, source, class) %>%
  summarise(total = sum(count, na.rm = TRUE), .groups = "drop")

# Plot
ggplot(class_summary_agg, aes(x = factor(ppm), y = total, fill = source)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~class, scales = "free_y") +
  labs(x = "Bifenazate (ppm)", y = "Total Count", title = "Class Detections by Source and Dose") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

lc50_pal <- c("#3F648A","#4DA29D")

# Enhanced LC50 overlay plot
ggplot(data = data.frame(ppm = dose_range), aes(x = ppm)) +
  geom_line(aes(y = pred_gt, color = "Manual (GT)"), size = 1.2) +
  geom_line(aes(y = pred_cv, color = "Model (CV)"), size = 1.2) +
  geom_hline(yintercept = 0.5, linetype = "dotted", color = "gray") +
  scale_x_log10() +
  labs(
    title = "Dose-Response Curves for Mortality (Manual vs. Model)",
    x = "Bifenazate (ppm, log scale)",
    y = "Mortality Rate",
    color = "Source"
  ) +
  theme_classic()+
  scale_color_manual(values = lc50_pal)

#save
ggsave("lc50_overlay_plot.pdf", width = 8, height = 6, dpi = 300)


# Prediction data frame
new_pred_data <- data.frame(ppm = seq(0, max(wide_df$ppm, na.rm = TRUE), length.out = 100),
                            cv_Adult_female = 1,
                            gt_Adult_female = 1)

new_pred_data$cv_pred <- predict(cv_nb_model, newdata = new_pred_data, type = "response")
new_pred_data$gt_pred <- predict(gt_nb_model, newdata = new_pred_data, type = "response")

# Plot both fecundity curves
ggplot(wide_df, aes(x = ppm)) +
  geom_point(aes(y = cv_fecundity), alpha = 0.4, color = "#3F648A") +
  geom_point(aes(y = gt_fecundity), alpha = 0.4, color = "#4DA29D") +
  geom_line(data = new_pred_data, aes(y = cv_pred), color = "#3F648A", size = 1.2) +
  geom_line(data = new_pred_data, aes(y = gt_pred), color = "#4DA29D", size = 1.2) +
  labs(
    title = "Fecundity Decline with Bifenazate Dose",
    x = "Bifenazate (ppm)",
    y = "Viable Eggs per Female"  ) +
  scale_x_log10() +
  theme_classic()
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("fecund_acaricide_overlay_plot.pdf", width = 8, height = 6, dpi = 300)
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).